View on GitHub

data-science

Notebooks and Python about data science

If you like this project please add your Star

GAN on C2C ski touring outing data with application to global warming prediction

Based on ski-touring outing reports from www.camptocamp.org in Haute-Savoie (France) and local temperature data reports in Megève, Generative Adversarial Networks are used to estimate the joint probability distribution of the outing features. GAN model is then modified to exhibit a Bayesian network structure (aka a graphical model) and pre-contrain the model on the type of outing (max elevation, difficulty), and temperature.

Learning goals:

  • Use GAN to model an unknown distribution
  • Mix GAN and graphical models
In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers, losses, optimizers, metrics, activations
import tensorview as tv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from plotly import express as px, graph_objects as go, subplots as sp
import itertools
from datetime import datetime
In [2]:
area_title = 'haute-savoie'
log_dir="logs/"
In [98]:
style_true = dict(color='cadetblue')
style_generated = dict(color='fuchsia')
style_fooled = dict(color='blue')
style_warm = dict(color='darkorange')
In [4]:
# Demappers: from numerical to ordinal labels (strings)
rating_unmapper = {0: 'awful', 1: 'poor', 2: 'average', 3: 'good', 4: 'excellent'}
ski_rating_unmapper = {**{3*i + j: f'{i+1}.{j+1}' for i,j in itertools.product(range(5), range (3))}, 15: '5.4', 16: '5.5', 17: '5.6'}

Read data and selet records and features

Data has already been downloaded from the API server of www.camptocamp.org in notebook DownloadC2cOutings (Jupyter / HTML)

In [5]:
df_outings = pd.read_parquet(f'data/C2C/outings_{area_title}.parquet')
In [6]:
# Features
features_c1 = {'ski_rating_num': 'Ski rating (numerical)', 'elevation_max': 'Elevation max'}
temperature_features = {'TEMPERATURE_MORNING_C': 'Morning temperature', 'temp_morning_7d': 'Morning temperature last 7 days', 'temp_morning_30d': 'Morning temperature last 30 days'}
features_c2 = {**temperature_features, 'day_of_season': 'Day of season'}
features_c3 = {'elevation_up_snow': 'Skis on, way up', 'elevation_down_snow': 'Skis off, way down', 'condition_rating_num': 'Condition rating (numerical)'}
used_cols_dict = {**features_c1, **features_c2, **features_c3}
used_cols = list(used_cols_dict.keys())

# Conditions to select outings:
# - Some serious impossible outliers on the elevation (could be in feet)
# - Filter on quality to retain fine, medium and great
condition = (df_outings.elevation_up_snow < 5000) & (df_outings.elevation_down_snow < 5000) & (df_outings.elevation_max < 5000) \
                & (df_outings.elevation_up_snow > 200) & (df_outings.elevation_down_snow > 200) & (df_outings.elevation_max > 200) \
               & ((df_outings.quality == 'fine') | (df_outings.quality == 'medium') | (df_outings.quality == 'great'))

df_sel = df_outings.loc[condition, used_cols]
# Remove rows containing at least 1 na
df_sel = df_sel[(~df_sel.isna().any(axis=1))]
# Permutation
df_sel_perm = np.random.permutation(df_sel)
len(df_sel)
Out[6]:
6656
In [142]:
len(used_cols)
Out[142]:
9
In [7]:
scaler = StandardScaler()
df_sel_scaled = scaler.fit_transform(df_sel_perm)
In [8]:
df_sel.describe()
Out[8]:
ski_rating_num elevation_max TEMPERATURE_MORNING_C temp_morning_7d temp_morning_30d day_of_season elevation_up_snow elevation_down_snow condition_rating_num
count 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000
mean 6.382812 2528.794621 -5.308594 -4.789728 -4.721685 7.707482 1464.071514 1419.389573 2.945463
std 3.071074 632.926708 6.698309 5.377412 4.231169 46.914599 604.572651 538.620869 0.792452
min 0.000000 600.000000 -28.000000 -19.285714 -13.533333 -140.000000 415.000000 415.000000 0.000000
25% 4.000000 2116.000000 -10.000000 -8.321429 -7.366667 -25.000000 1128.000000 1128.000000 2.000000
50% 6.000000 2406.000000 -6.000000 -5.428571 -5.466667 7.000000 1296.500000 1280.000000 3.000000
75% 8.000000 2666.000000 -1.000000 -2.000000 -2.733333 39.000000 1453.000000 1450.000000 3.000000
max 16.000000 4825.000000 16.000000 16.285714 15.366667 167.000000 4810.000000 3842.000000 4.000000
In [151]:
sel_corr = df_sel.corr()
px.imshow(sel_corr, 
          title='Feature correlations in reference (train) data', height=500)

Model

In [10]:
batch_size = 512
latent_dim = 20
num_features = len(used_cols)
In [135]:
generator = models.Sequential([
    layers.Dense(32, input_dim=latent_dim, name='g_1', activation=activations.relu),
    layers.Dropout(0.3),
    layers.Dense(48, name='g_2', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(48, name='g_3', activation=activations.relu),
    layers.Dense(64, name='g_4', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(num_features, name='g_5')
], name='generator')
generator.compile()
generator.summary()
Model: "generator"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
g_1 (Dense)                  (None, 32)                672       
_________________________________________________________________
dropout_21 (Dropout)         (None, 32)                0         
_________________________________________________________________
g_2 (Dense)                  (None, 48)                1584      
_________________________________________________________________
dropout_22 (Dropout)         (None, 48)                0         
_________________________________________________________________
g_3 (Dense)                  (None, 48)                2352      
_________________________________________________________________
g_4 (Dense)                  (None, 64)                3136      
_________________________________________________________________
dropout_23 (Dropout)         (None, 64)                0         
_________________________________________________________________
g_5 (Dense)                  (None, 9)                 585       
=================================================================
Total params: 8,329
Trainable params: 8,329
Non-trainable params: 0
_________________________________________________________________
In [141]:
discriminator = models.Sequential([
    layers.Dense(64, input_dim=num_features, name='d_1', activation=activations.relu),
    layers.Dropout(0.3),
    layers.Dense(48, name='d_2', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(48, name='d_3', activation=activations.relu),
    layers.Dense(32, name='d_4', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(1, name='d_5') # activation='sigmoid', 
], name='discriminator')
discriminator.compile()
In [126]:
tf.keras.utils.plot_model(generator, show_shapes=True, dpi=64)
Out[126]:

Train

In [14]:
epochs = 400
batch_per_epoch = len(df_sel_scaled) // batch_size
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
In [136]:
def generator_loss(disc_generated_output):
    return loss_object(tf.ones_like(disc_generated_output), disc_generated_output)
In [16]:
def discriminator_loss(disc_real_output, disc_generated_output):

    real_loss = loss_object(tf.ones_like(disc_real_output), disc_real_output)
    generated_loss = loss_object(tf.zeros_like(disc_generated_output), disc_generated_output)

    return real_loss + generated_loss
In [17]:
def get_summary_writer():
    return tf.summary.create_file_writer(log_dir + "fit/" + datetime.now().strftime("%Y%m%d-%H%M%S"))
In [18]:
@tf.function
def train_step(generator, discriminator, 
               generator_optimizer, discriminator_optimizer, 
               generator_latent, batch, 
               epoch, summary_writer):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        
        gen_latent = generator_latent()
        
        gen_output = generator(gen_latent, training=True)

        disc_real_output = discriminator(batch, training=True)
        disc_generated_output = discriminator(gen_output, training=True)

        gen_loss = generator_loss(disc_generated_output)
        disc_loss = discriminator_loss(disc_real_output, disc_generated_output)

        generator_gradients = gen_tape.gradient(gen_loss, generator.trainable_variables)
        discriminator_gradients = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(generator_gradients, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(discriminator_gradients, discriminator.trainable_variables))

    with summary_writer.as_default():
        tf.summary.scalar('gen_loss', gen_loss, step=epoch)
        tf.summary.scalar('disc_loss', disc_loss, step=epoch)
        
    return gen_loss, disc_loss
In [19]:
generator_optimizer = tf.keras.optimizers.Adam(3e-4, beta_1=0.4)
discriminator_optimizer = tf.keras.optimizers.Adam(3e-4, beta_1=0.4)
In [20]:
tv_plot = tv.train.PlotMetrics(wait_num=200, columns=2, iter_num=epochs * batch_per_epoch)
summary = get_summary_writer()

def generator_latent():
    return tf.random.normal((batch_size, latent_dim), 0, 1)

for epoch in range(epochs):

    for b in range(batch_per_epoch):
        train_batch = df_sel_scaled[b * batch_size:(b+1) * batch_size]
        
        g_loss, d_loss = train_step(generator, discriminator, 
                                    generator_optimizer, discriminator_optimizer, 
                                    generator_latent, train_batch, 
                                    epoch, summary)
        # Plot
        tv_plot.update({ 'discriminator_loss': d_loss,# 'discriminator_acc': d_acc,
                        'generator_loss': g_loss, # 'generator_acc': g_acc
                       })
        tv_plot.draw()

    # saving (checkpoint) the model every 20 epochs
    #if (epoch + 1) % 20 == 0:
    #  checkpoint.save(file_prefix = checkpoint_prefix)

Helpers

In [123]:
def print_stats(sets, feature, label, format_float=False):
    if format_float:
        print(f'Mean/std {label}:', ', '.join([f'for {key}={data[feature].mean():.1f}/{data[feature].std():.1f}' for key, data in sets.items()]))
    else:
        print(f'Mean/std {label}:', ', '.join([f'for {key}={data[feature].mean():.3g}/{data[feature].std():.3g}' for key, data in sets.items()]))
        
def print_median_stats(sets, feature, label, format_float=False):
    if format_float:
        print(f'Median {label}:', ', '.join([f'for {key}={data[feature].median():.1f}' for key, data in sets.items()]))
    else:
        print(f'Median {label}:', ', '.join([f'for {key}={data[feature].median():.3g}' for key, data in sets.items()]))

Test

In [21]:
gen_latent = np.random.normal(0, 1, (20000, latent_dim))
gen_outings = generator.predict(gen_latent)
gen_outings_unscaled = scaler.inverse_transform(gen_outings)
In [22]:
scores = tf.sigmoid(discriminator.predict(gen_outings))
fooled = scores >= 0.5
In [23]:
fooled.numpy().mean()
Out[23]:
0.25065
In [24]:
df_generated = pd.DataFrame(gen_outings_unscaled, columns=used_cols)
df_generated['ski_rating'] = df_generated['ski_rating_num'].round().clip(0, 17).replace(ski_rating_unmapper)
df_generated['condition_rating'] = df_generated['condition_rating_num'].round().clip(0, 4).replace(rating_unmapper)
In [25]:
df_generated.describe()
Out[25]:
ski_rating_num elevation_max TEMPERATURE_MORNING_C temp_morning_7d temp_morning_30d day_of_season elevation_up_snow elevation_down_snow condition_rating_num
count 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000
mean 5.729935 2413.197266 -5.360540 -5.404138 -5.033844 -0.118180 1493.199707 1494.822998 3.248640
std 2.141164 519.507385 5.506902 4.323305 3.645584 40.383087 454.467651 454.742432 0.564495
min 0.647871 1627.778687 -21.197937 -18.276754 -12.416104 -101.546127 1046.085693 1046.872192 0.710991
25% 3.990028 2076.264038 -9.278057 -8.576509 -7.711310 -33.306271 1233.977112 1235.609039 2.916285
50% 5.553616 2219.525391 -5.660631 -5.587927 -5.670433 -1.906403 1317.274963 1319.664978 3.316270
75% 7.289069 2586.397705 -1.843851 -2.634528 -3.261645 34.890918 1536.654205 1537.049561 3.635854
max 13.141407 5712.603027 16.383104 9.753605 11.213124 112.788986 5241.662598 5261.432617 4.819857
In [147]:
generated_corr = df_generated.corr()
px.imshow(generated_corr, height=500)
In [146]:
corr_diff1 = (generated_corr - sel_corr).abs()
print(f'Mean absolute error on correlations: {corr_diff1.values.mean()}')

px.imshow(corr_diff1, zmax=0.3, color_continuous_scale='viridis',
          title='Absolute correlation differences between train and generated', height=500)
Mean absolute error on correlations: 0.06620136238435569
In [125]:
print_median_stats({'true': df_sel, 'generated': df_generated}, 'elevation_up_snow', 'Skis on, way up', True)
print_median_stats({'true': df_sel, 'generated': df_generated}, 'elevation_down_snow', 'Skis off, way down', True)
print_median_stats({'true': df_sel, 'generated': df_generated}, 'elevation_max', 'Elevation max', True)
fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['Skis on, way up', 'Skis off, way down', 'Elevation max'], x_title='Elevations [m]')
bins = {'start': 0, 'end': 4800, 'size': 100}
fig.add_trace(go.Histogram(x=df_sel['elevation_up_snow'], xbins=bins, name='true', histnorm='percent', marker=style_true), row=1, col=1)
fig.add_trace(go.Histogram(x=df_generated['elevation_up_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=1)
fig.add_trace(go.Histogram(x=df_sel['elevation_down_snow'], xbins=bins, name='true', histnorm='percent', marker=style_true), row=1, col=2)
fig.add_trace(go.Histogram(x=df_generated['elevation_down_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=2)
fig.add_trace(go.Histogram(x=df_sel['elevation_max'], xbins=bins, name='true', histnorm='percent', marker=style_true), row=1, col=3)
fig.add_trace(go.Histogram(x=df_generated['elevation_max'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=3)
Median Skis on, way up: for true=1296.5, for generated=1317.3
Median Skis off, way down: for true=1280.0, for generated=1319.7
Median Elevation max: for true=2406.0, for generated=2219.5
In [29]:
px.scatter_matrix(df_generated[['elevation_up_snow', 'elevation_down_snow', 'elevation_max']], opacity=0.1, title='Generated elevations correlations', labels=used_cols_dict)
In [87]:
print_stats({'true': df_sel, 'generated': df_generated}, 'day_of_season', 'day of season')

fig = go.Figure(layout=dict(title='Day of season (mid season = Feb 15th)', bargroupgap=0.01, 
                            xaxis=dict(title='Day relative to Feb 15th'), yaxis=dict(title='%')))
fig.add_histogram(x=df_sel.day_of_season, name='true', nbinsx=52, histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated.day_of_season, name='generated', nbinsx=52, histnorm='percent', marker=style_generated)
Mean/std day of season:  for true=7.71/46.9, for generated=-0.118/40.4
Mean/std day of season, for true=7.71/46.9, for generated=-0.118/40.4

Ski ratings

In [88]:
print_stats({'true': df_sel, 'generated': df_generated}, 'ski_rating_num', 'ski rating (numerical)')
fig = go.Figure(layout=dict(title='Ski rating', bargroupgap=0.01, 
                            xaxis=dict(title='1.1 (easy) to 5.6 (extreme)', categoryorder='array', categoryarray=list(ski_rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_outings[condition].ski_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated.ski_rating, name='generated', histnorm='percent', marker=style_generated)
Mean/std ski rating (numerical):  for true=6.38/3.07, for generated=5.73/2.14
Mean/std ski rating num, for true=6.38/3.07, for generated=5.73/2.14

Condition ratings

In [90]:
print_stats({'true': df_sel, 'generated': df_generated}, 'condition_rating_num', 'condition rating (numerical)')
fig = go.Figure(layout=dict(title='Condition rating', bargroupgap=0.01, 
                            xaxis=dict(categoryorder='array', categoryarray=list(rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_outings[condition].condition_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated.condition_rating, name='generated', histnorm='percent', marker=style_generated)
Mean/std condition rating (numerical):  for true=2.95/0.792, for generated=3.25/0.564

Temperatures

In [86]:
print(f'Mean/std morning temperature, for true={df_sel.TEMPERATURE_MORNING_C.mean():.3g}/{df_sel.TEMPERATURE_MORNING_C.std():.3g}, for generated={df_generated.TEMPERATURE_MORNING_C.mean():.3g}/{df_generated.TEMPERATURE_MORNING_C.std():.3g}')
print(f'Mean/std last 7 day morning temperature, for true={df_sel.temp_morning_7d.mean():.3g}/{df_sel.temp_morning_7d.std():.3g}, for generated={df_generated.temp_morning_7d.mean():.3g}/{df_generated.temp_morning_7d.std():.3g}')
print(f'Mean/std last 30 day morning temperature, for true={df_sel.temp_morning_30d.mean():.3g}/{df_sel.temp_morning_30d.std():.3g}, for generated={df_generated.temp_morning_30d.mean():.3g}/{df_generated.temp_morning_30d.std():.3g}')

fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['today', 'last 7 day', 'last 30 day'],
                      x_title='Morning temperature [°C]')
# fig = go.Figure(layout=dict(title='Trou de la Mouche, morning temperature [°C]', bargroupgap=0.1, yaxis=dict(title='%')))
bins = {'start': -35, 'end': 30, 'size': 1}
fig.add_histogram(x=df_sel.TEMPERATURE_MORNING_C, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=1)
fig.add_histogram(x=df_generated.TEMPERATURE_MORNING_C, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=1)
fig.add_histogram(x=df_sel.temp_morning_7d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=2)
fig.add_histogram(x=df_generated.temp_morning_7d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=2)
fig.add_histogram(x=df_sel.temp_morning_30d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=3)
fig.add_histogram(x=df_generated.temp_morning_30d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=3)
fig.update_yaxes(title='%')
Mean/std morning temperature, for true=-5.31/6.7, for generated=-5.36/5.51
Mean/std last 7 day morning temperature, for true=-4.79/5.38, for generated=-5.4/4.32
Mean/std last 30 day morning temperature, for true=-4.72/4.23, for generated=-5.03/3.65
In [36]:
px.scatter_matrix(df_generated[['TEMPERATURE_MORNING_C', 'temp_morning_7d', 'temp_morning_30d', 'elevation_up_snow', 'elevation_down_snow']], opacity=0.1, 
                  title='Temperature-elevation correlations', height=600)

Part-2 Adding some independency constraints on the generator

Clique 1 features of the outing are set independent: maximum elevation and ski rating.

Clique 2 features (temperatures, day of season) are dependent on feature of clique 1 of other features.

The other elevations depend on the quantity and quality of snow and are thus dependent on the clique 1, clique 2 features and day within the season (sun light is dependent on the day of the year).

In [37]:
num_features_c1 = len(features_c1)
num_features_c2 = len(features_c2)
num_features_c3 = num_features - num_features_c1 - num_features_c2
num_latent2 = 10
gen_learning_rate2 = 0.002
In [131]:
def make_generator(n_feat: int, name: str, prefix: str, num_latent: int, num_element_base: int):
    return models.Sequential([
        layers.Dense(num_element_base * 2, input_dim=num_latent, name=f'{prefix}_1', activation=activations.relu),
        layers.Dropout(0.3),
        #layers.Dense(num_element_base * 4, name=f'{prefix}_2', activation=activations.relu),
        #layers.Dropout(0.2),
        layers.Dense(num_element_base * 6, name=f'{prefix}_4', activation=activations.relu),
        layers.Dropout(0.2),
        layers.Dense(n_feat, name=f'{prefix}_5')
    ], name=name)

gen_c1 = make_generator(num_features_c1, 'gen_c1', 'g1', num_latent2, 12)
gen_c2 = make_generator(num_features_c2, 'gen_c2', 'g2', num_latent2, 12)
gen_c3 = make_generator(num_features_c3, 'gen_c3', 'g3', num_latent2, 16)
In [134]:
# Clique #1: outing summit elevation and ski rating
num_latent_c1 = num_latent2
input_c1 = layers.Input(num_latent_c1, name='latent_c1')
c1 = gen_c1(input_c1)
# Clique #2: temperatures and day of season
num_latent_c2 = num_latent2 - num_features_c1
input_c2 = layers.Input(num_latent_c2, name='latent_c2')
input_c2b = layers.concatenate([c1, input_c2])
c2 = gen_c2(input_c2b)
# Clique #3: all other dependent features
num_latent_c3 = num_latent2 - num_features_c1 - num_features_c2
input_c3 = layers.Input(num_latent_c3, name='latent_c3')
input_c3b = layers.concatenate([c1, c2, input_c3])
c3 = gen_c3(input_c3b)
# Output of generator
gen2_output = layers.concatenate([c1, c2, c3], name='generator_output')
generator2 = models.Model([input_c1, input_c2, input_c3], gen2_output, name='generator2')
generator2.compile()
generator2.summary()
Model: "generator2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
latent_c1 (InputLayer)          [(None, 10)]         0                                            
__________________________________________________________________________________________________
gen_c1 (Sequential)             (None, 2)            2210        latent_c1[0][0]                  
__________________________________________________________________________________________________
latent_c2 (InputLayer)          [(None, 8)]          0                                            
__________________________________________________________________________________________________
concatenate_10 (Concatenate)    (None, 10)           0           gen_c1[1][0]                     
                                                                 latent_c2[0][0]                  
__________________________________________________________________________________________________
gen_c2 (Sequential)             (None, 4)            2356        concatenate_10[0][0]             
__________________________________________________________________________________________________
latent_c3 (InputLayer)          [(None, 4)]          0                                            
__________________________________________________________________________________________________
concatenate_11 (Concatenate)    (None, 10)           0           gen_c1[1][0]                     
                                                                 gen_c2[1][0]                     
                                                                 latent_c3[0][0]                  
__________________________________________________________________________________________________
gen_c3 (Sequential)             (None, 3)            3811        concatenate_11[0][0]             
__________________________________________________________________________________________________
generator_output (Concatenate)  (None, 9)            0           gen_c1[1][0]                     
                                                                 gen_c2[1][0]                     
                                                                 gen_c3[1][0]                     
==================================================================================================
Total params: 8,377
Trainable params: 8,377
Non-trainable params: 0
__________________________________________________________________________________________________
In [40]:
discriminator2 = models.Sequential([
    layers.Dense(64, input_dim=num_features, name='d_1', activation=activations.relu),
    layers.Dropout(0.3),
    layers.Dense(48, name='d_2', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(48, name='d_3', activation=activations.relu),
    layers.Dense(32, name='d_4', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(1, name='d_5') # , activation='sigmoid' linear activation to output logits
], name='discriminator2')
discriminator2.compile()
In [133]:
tf.keras.utils.plot_model(generator2, show_shapes=True, dpi=64)
Out[133]:

Train 2

In [41]:
@tf.function
def train_step2(generator, discriminator, 
               generator_optimizer, discriminator_optimizer, 
               generator_latent, batch, 
               epoch, summary_writer):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        
        gen_latent = generator_latent()
        
        gen_output = generator(gen_latent, training=True)

        disc_real_output = discriminator(batch, training=True)
        disc_generated_output = discriminator(gen_output, training=True)

        gen_loss = generator_loss(disc_generated_output)
        disc_loss = discriminator_loss(disc_real_output, disc_generated_output)

        generator_gradients = gen_tape.gradient(gen_loss, generator.trainable_variables)
        discriminator_gradients = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(generator_gradients, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(discriminator_gradients, discriminator.trainable_variables))

    with summary_writer.as_default():
        tf.summary.scalar('gen_loss', gen_loss, step=epoch)
        tf.summary.scalar('disc_loss', disc_loss, step=epoch)
        
    return gen_loss, disc_loss
In [42]:
generator_optimizer2 = tf.keras.optimizers.Adam(4e-4, beta_1=0.3)
discriminator_optimizer2 = tf.keras.optimizers.Adam(4e-4, beta_1=0.3)
In [43]:
tv_plot = tv.train.PlotMetrics(wait_num=200, columns=2, iter_num=epochs * batch_per_epoch)
summary2 = get_summary_writer()

def generator_latent2():
    return [tf.random.normal((batch_size, num_latent_c1), 0, 1), 
            tf.random.normal((batch_size, num_latent_c2), 0, 1), 
            tf.random.normal((batch_size, num_latent_c3), 0, 1)]

for epoch in range(epochs):

    for b in range(batch_per_epoch):
        train_batch = df_sel_scaled[b * batch_size:(b+1) * batch_size]
        
        g_loss, d_loss = train_step2(generator2, discriminator2, 
                                    generator_optimizer2, discriminator_optimizer2,
                                    generator_latent2, train_batch, epoch, summary2)
        # Plot
        tv_plot.update({ 'discriminator_loss': d_loss,# 'discriminator_acc': d_acc,
                        'generator_loss': g_loss, # 'generator_acc': g_acc
                       })
        tv_plot.draw()

    # saving (checkpoint) the model every 20 epochs
    #if (epoch + 1) % 20 == 0:
    #  checkpoint.save(file_prefix = checkpoint_prefix)

Test 2

In [44]:
num_test = 20000
gen_latent2 = [np.random.normal(0, 1, (num_test, num_latent_c1)), 
               np.random.normal(0, 1, (num_test, num_latent_c2)), 
               np.random.normal(0, 1, (num_test, num_latent_c3))]
gen_outings2 = generator2.predict(gen_latent2)
gen_outings2_unscaled = scaler.inverse_transform(gen_outings2)
In [45]:
scores2 = tf.sigmoid(discriminator2.predict(gen_outings2))
fooled2 = scores2 >= 0.5
In [46]:
scores2.numpy().mean(), fooled2.numpy().mean()
Out[46]:
(0.48396152, 0.42915)
In [47]:
df_generated2 = pd.DataFrame(gen_outings2_unscaled, columns=used_cols)
df_generated2['ski_rating'] = df_generated2['ski_rating_num'].round().clip(0, 17).replace(ski_rating_unmapper)
df_generated2['condition_rating'] = df_generated2['condition_rating_num'].round().clip(0, 4).replace(rating_unmapper)
In [48]:
df_generated2_fooled = df_generated2.loc[fooled2.numpy()]
In [49]:
df_generated2.describe()
Out[49]:
ski_rating_num elevation_max TEMPERATURE_MORNING_C temp_morning_7d temp_morning_30d day_of_season elevation_up_snow elevation_down_snow condition_rating_num
count 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000
mean 6.269345 2516.115234 -5.908023 -4.355381 -4.985327 11.292784 1383.596802 1395.721924 3.133009
std 2.727489 486.370483 5.949190 4.526345 3.376279 42.924587 393.201874 383.494019 0.625418
min 0.355422 1612.225952 -24.786013 -17.898863 -12.391416 -113.597267 901.643982 897.716492 0.162391
25% 3.885675 2181.927429 -10.377267 -7.640029 -7.458173 -21.975782 1139.769318 1158.760193 2.780935
50% 5.752159 2353.187256 -6.007393 -5.025619 -5.633104 13.062337 1282.455383 1303.570496 3.231694
75% 8.452662 2722.252563 -1.669220 -1.602756 -3.230154 44.800600 1433.980194 1452.335754 3.596721
max 14.965883 5447.074219 13.945936 14.713383 13.018071 149.852905 3740.555908 3762.617188 4.714316
In [148]:
generated_corr2 = df_generated2.corr()
px.imshow(generated_corr2, height=500)
In [145]:
corr_diff_2 = (generated_corr2 - sel_corr).abs()
print(f'Mean absolute error on correlations: {corr_diff_2.values.mean()}')

px.imshow(corr_diff_2, zmax=0.3, color_continuous_scale='viridis',
          title='Absolute correlation differences between train and generated', height=500)
Mean absolute error on correlations: 0.046806750986417484

Elevations

In [122]:
print_stats({'true': df_sel, 'generated': df_generated2}, 'elevation_up_snow', 'Skis on, way up', True)
print_stats({'true': df_sel, 'generated': df_generated2}, 'elevation_down_snow', 'Skis off, way down', True)
print_stats({'true': df_sel, 'generated': df_generated2}, 'elevation_max', 'Elevation max', True)
fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['Skis on, way up', 'Skis off, way down', 'Elevation max'], x_title='Elevation [m]')
bins = {'start': 900, 'end': 4000, 'size': 100}
fig.add_trace(go.Histogram(x=df_sel['elevation_up_snow'], xbins=bins, name='real', histnorm='percent', marker=style_true), row=1, col=1)
fig.add_trace(go.Histogram(x=df_generated2['elevation_up_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=1)
fig.add_trace(go.Histogram(x=df_generated2_fooled['elevation_up_snow'], xbins=bins, name='fooled', histnorm='percent', marker=style_fooled), row=1, col=1)
fig.add_trace(go.Histogram(x=df_sel['elevation_down_snow'], xbins=bins, name='real', histnorm='percent', marker=style_true), row=1, col=2)
fig.add_trace(go.Histogram(x=df_generated2['elevation_down_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=2)
fig.add_trace(go.Histogram(x=df_generated2_fooled['elevation_down_snow'], xbins=bins, name='fooled', histnorm='percent', marker=style_fooled), row=1, col=2)
fig.add_trace(go.Histogram(x=df_sel['elevation_max'], xbins=bins, name='real', histnorm='percent', marker=style_true), row=1, col=3)
fig.add_trace(go.Histogram(x=df_generated2['elevation_max'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=3)
fig.add_trace(go.Histogram(x=df_generated2_fooled['elevation_max'], xbins=bins, name='fooled', histnorm='percent', marker=style_fooled), row=1, col=3)
Mean/std Skis on, way up: for true=1464.1/604.6, for generated=1383.6/393.2
Mean/std Skis off, way down: for true=1419.4/538.6, for generated=1395.7/383.5
Mean/std Elevation max: for true=2528.8/632.9, for generated=2516.1/486.4

Day of season

In [97]:
print_stats({'true': df_sel, 'generated': df_generated2, 'fooled': df_generated2_fooled}, 'day_of_season', 'Day of season')

fig = go.Figure(layout=dict(title='Day of season (mid season = Feb 15th)', bargroupgap=0.01, 
                            xaxis=dict(title='Day relative to Feb 15th'), yaxis=dict(title='%')))
fig.add_histogram(x=df_sel.day_of_season, name='true', nbinsx=52, histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated2.day_of_season, name='generated', nbinsx=52, histnorm='percent', marker=style_generated)
fig.add_histogram(x=df_generated2_fooled.day_of_season, name='fooled', nbinsx=52, histnorm='percent', marker=style_fooled)
Mean/std Day of season:  for true=7.71/46.9, for generated=11.3/42.9, for fooled=8.22/41.6

Ski ratings

In [54]:
print(f'Mean/std ski rating num, for true={df_sel.ski_rating_num.mean():.3g}/{df_sel.ski_rating_num.std():.3g}, for generated={df_generated2.ski_rating_num.mean():.3g}/{df_generated.ski_rating_num.std():.3g}')
fig = go.Figure(layout=dict(title='Ski rating', bargroupgap=0.01, 
                            xaxis=dict(title='1.1 (easy) to 5.6 (extreme)', categoryorder='array', categoryarray=list(ski_rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_outings[condition].ski_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated2.ski_rating, name='generated', histnorm='percent', marker=style_generated)
fig.add_histogram(x=df_generated2_fooled.ski_rating, name='fooled', histnorm='percent', marker=style_fooled)
Mean/std ski rating num, for true=6.38/3.07, for generated=6.27/2.14

Condition ratings

In [55]:
print(f'Mean/std condition rating num, for true={df_sel.condition_rating_num.mean():.3g}/{df_sel.condition_rating_num.std():.3g}, for generated={df_generated2.condition_rating_num.mean():.3g}/{df_generated2.condition_rating_num.std():.3g}')
fig = go.Figure(layout=dict(title='Condition rating', bargroupgap=0.01, 
                            xaxis=dict(categoryorder='array', categoryarray=list(rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_outings[condition].condition_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated2.condition_rating, name='generated', histnorm='percent', marker=style_generated)
Mean/std condition rating num, for true=2.95/0.792, for generated=3.13/0.625

Temperatures

In [56]:
print(f'Mean/std morning temperature, for true={df_sel.TEMPERATURE_MORNING_C.mean():.3g}/{df_sel.TEMPERATURE_MORNING_C.std():.3g}, for generated={df_generated2.TEMPERATURE_MORNING_C.mean():.3g}/{df_generated2.TEMPERATURE_MORNING_C.std():.3g}')
print(f'Mean/std last 7 day morning temperature, for true={df_sel.temp_morning_7d.mean():.3g}/{df_sel.temp_morning_7d.std():.3g}, for generated={df_generated2.temp_morning_7d.mean():.3g}/{df_generated2.temp_morning_7d.std():.3g}')
print(f'Mean/std last 30 day morning temperature, for true={df_sel.temp_morning_30d.mean():.3g}/{df_sel.temp_morning_30d.std():.3g}, for generated={df_generated2.temp_morning_30d.mean():.3g}/{df_generated2.temp_morning_30d.std():.3g}')

fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['today', 'last 7 day', 'last 30 day'],
                      x_title='Morning temperature [°C]')
# fig = go.Figure(layout=dict(title='Trou de la Mouche, morning temperature [°C]', bargroupgap=0.1, yaxis=dict(title='%')))
bins = {'start': -35, 'end': 30, 'size': 1}
fig.add_histogram(x=df_sel.TEMPERATURE_MORNING_C, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=1)
fig.add_histogram(x=df_generated2.TEMPERATURE_MORNING_C, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=1)
fig.add_histogram(x=df_sel.temp_morning_7d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=2)
fig.add_histogram(x=df_generated2.temp_morning_7d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=2)
fig.add_histogram(x=df_sel.temp_morning_30d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=3)
fig.add_histogram(x=df_generated2.temp_morning_30d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=3)
fig.update_yaxes(title='%')
Mean/std morning temperature, for true=-5.31/6.7, for generated=-5.91/5.95
Mean/std last 7 day morning temperature, for true=-4.79/5.38, for generated=-4.36/4.53
Mean/std last 30 day morning temperature, for true=-4.72/4.23, for generated=-4.99/3.38
In [57]:
px.scatter_matrix(df_generated2[['TEMPERATURE_MORNING_C', 'temp_morning_7d', 'temp_morning_30d', 'elevation_up_snow', 'elevation_down_snow']], opacity=0.1, 
                  title='Temperature-elevation correlations for generated data', height=600)

Test on a given outing

Trou de la Mouche: https://www.camptocamp.org/waypoints/37312/fr/trou-de-la-mouche 508 ski outings

Trou de la Mouche, Paccaly => Grand Crêt: https://www.camptocamp.org/outings?r=46396&act=skitouring 100 ski outings

In [58]:
def scale_single(scaler, index, data):
    return (data - scaler.mean_[index]) / scaler.scale_[index]

def unscale_single(scaler, index, data):
    return (data * scaler.scale_[index]) + scaler.mean_[index]

def shift_scaled(scaler, index, data, offset):
    """ Shift all samples with a constant offset on a scaled data """
    unscaled = unscale_single(scaler, index, data)
    return scale_single(scaler, index, unscaled + offset)
In [59]:
route_title = 'trou-de-la-mouche'
df_route_true = pd.read_parquet(f'data/C2C/outings_{route_title}.parquet')
In [60]:
tdlm_label = 'Trou de la Mouche'
tdlm_elevation_max = 2453
tdlm_elevation_max_scaled = scale_single(scaler, used_cols.index('elevation_max'), tdlm_elevation_max)
tdlm_ski_rating = 6
tdlm_ski_rating_scaled = scale_single(scaler, used_cols.index('ski_rating_num'), tdlm_ski_rating)
tdlm_num = 5000
In [61]:
tdlm_c1 = np.ones([tdlm_num, 1]).dot([[tdlm_ski_rating_scaled, tdlm_elevation_max_scaled]])
In [62]:
tdlm_c2 = gen_c2.predict(np.c_[tdlm_c1, 
                               np.random.normal(0, 1, [tdlm_num, num_latent_c2])])
In [63]:
tdlm_c3 = gen_c3.predict(np.c_[tdlm_c1, 
                               tdlm_c2, 
                               np.random.normal(0, 1, [tdlm_num, num_latent_c3])])
In [64]:
df_tdlm = pd.DataFrame(scaler.inverse_transform(np.c_[tdlm_c1, 
                                                      tdlm_c2, 
                                                      tdlm_c3]), 
                       columns=used_cols)

#df_tdlm['ski_rating'] = df_tdlm['ski_rating_num'].round().clip(0, 17).replace(ski_rating_unmapper)
df_tdlm['condition_rating'] = df_tdlm['condition_rating_num'].round().clip(0, 4).replace(rating_unmapper)
In [65]:
tdlm_scores = tf.sigmoid(discriminator2.predict(np.c_[tdlm_c1, 
                                                      tdlm_c2, 
                                                      tdlm_c3]))
tdlm_fooled = tdlm_scores >= 0.5
tdlm_scores.numpy().mean(), tdlm_fooled.numpy().mean()
Out[65]:
(0.51748145, 0.5066)
In [66]:
df_tdlm_fooled = df_tdlm[tdlm_fooled.numpy()]
len(df_tdlm_fooled)
Out[66]:
2533
In [67]:
df_tdlm.head()
Out[67]:
ski_rating_num elevation_max TEMPERATURE_MORNING_C temp_morning_7d temp_morning_30d day_of_season elevation_up_snow elevation_down_snow condition_rating_num condition_rating
0 6.0 2453.0 -11.954268 -9.823621 -8.549357 25.053149 1123.481718 1156.144686 2.459505 average
1 6.0 2453.0 -2.291726 -2.662566 -4.024036 29.712489 1101.107544 1103.091379 3.963119 excellent
2 6.0 2453.0 -5.171424 -3.428711 -4.398896 -38.405592 1367.348348 1380.962696 2.798571 good
3 6.0 2453.0 -5.094637 -3.698594 -5.287525 44.016528 1117.914460 1122.245456 3.922287 excellent
4 6.0 2453.0 -3.004414 -1.956645 -1.945710 -59.972162 1481.528883 1493.576576 3.184476 good
In [68]:
df_tdlm_fooled.head()
Out[68]:
ski_rating_num elevation_max TEMPERATURE_MORNING_C temp_morning_7d temp_morning_30d day_of_season elevation_up_snow elevation_down_snow condition_rating_num condition_rating
1 6.0 2453.0 -2.291726 -2.662566 -4.024036 29.712489 1101.107544 1103.091379 3.963119 excellent
3 6.0 2453.0 -5.094637 -3.698594 -5.287525 44.016528 1117.914460 1122.245456 3.922287 excellent
4 6.0 2453.0 -3.004414 -1.956645 -1.945710 -59.972162 1481.528883 1493.576576 3.184476 good
6 6.0 2453.0 0.455279 1.514053 0.151047 72.284229 1454.779573 1457.502282 3.852809 excellent
10 6.0 2453.0 -15.577923 -6.737218 -6.781623 -19.162062 1421.540097 1440.182504 3.058459 good

Elevations

In [114]:
fig = sp.make_subplots(rows=1, cols=2, shared_yaxes=True, subplot_titles=['Skis on, way up', 'Skis off, way down', 'Max'],
                      x_title="Trou de la Mouche, elevations' cumulative histogram [m]")
bins = {'start': 0, 'end': 2000, 'size': 25}
fig.add_trace(go.Histogram(x=df_route_true['elevation_up_snow'], xbins=bins, name='true', histnorm='percent', marker=style_true), row=1, col=1)
fig.add_trace(go.Histogram(x=df_tdlm['elevation_up_snow'], xbins=bins, name='generated', histnorm='percent', cumulative_enabled=True, marker=style_generated), row=1, col=1)
# fig.add_trace(go.Histogram(x=df_tdlm_fooled['elevation_up_snow'], xbins=bins, name='fooled', histnorm='percent', cumulative_enabled=True, marker=style_fooled), row=1, col=1)
fig.add_trace(go.Histogram(x=df_route_true['elevation_down_snow'], xbins=bins, name='true', histnorm='percent', cumulative_enabled=True, marker=style_true), row=1, col=2)
fig.add_trace(go.Histogram(x=df_tdlm['elevation_down_snow'], xbins=bins, name='generated', histnorm='percent', cumulative_enabled=True, marker=style_generated), row=1, col=2)
# fig.add_trace(go.Histogram(x=df_tdlm_fooled['elevation_down_snow'], xbins=bins, name='fooled', histnorm='percent', cumulative_enabled=True, marker=style_fooled), row=1, col=2)

Day of season

In [115]:
print(f'Mean/std day of season, for true={df_route_true.day_of_season.mean():.3g}/{df_route_true.day_of_season.std():.3g}, for generated={df_generated.day_of_season.mean():.3g}/{df_generated.day_of_season.std():.3g}')

fig = go.Figure(layout=dict(title='Trou de la Mouche, day of season (mid season = Feb 15th)', bargroupgap=0.01, 
                            xaxis=dict(title='Day relative to Feb 15th'), yaxis=dict(title='%')))
fig.add_histogram(x=df_route_true.day_of_season, name='true', nbinsx=52, histnorm='percent', marker=style_true)
fig.add_histogram(x=df_tdlm.day_of_season, name='generated', nbinsx=52, histnorm='percent', marker=style_generated)
# fig.add_histogram(x=df_tdlm_fooled.day_of_season, name='fooled', nbinsx=52, histnorm='percent', marker=style_fooled)
Mean/std day of season, for true=14.2/40.6, for generated=-0.118/40.4

Condition ratings

In [71]:
print(f'Mean/std condition rating num, for true={df_route_true.condition_rating_num.mean():.3g}/{df_route_true.condition_rating_num.std():.3g}, for generated={df_generated.condition_rating_num.mean():.3g}/{df_generated.condition_rating_num.std():.3g}')
fig = go.Figure(layout=dict(title='Trou de la Mouche, condition rating', bargroupgap=0.01, 
                            xaxis=dict(categoryorder='array', categoryarray=list(rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_route_true.condition_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_tdlm.condition_rating, name='generated', histnorm='percent', marker=style_generated)
fig.add_histogram(x=df_tdlm_fooled.condition_rating, name='fooled', histnorm='percent', marker=style_fooled)
Mean/std condition rating num, for true=2.85/0.825, for generated=3.25/0.564

Temperatures Trou de la Mouche

In [72]:
print(f'Mean/std morning temperature, for true={df_route_true.TEMPERATURE_MORNING_C.mean():.3g}/{df_route_true.TEMPERATURE_MORNING_C.std():.3g}, for generated={df_tdlm.TEMPERATURE_MORNING_C.mean():.3g}/{df_tdlm.TEMPERATURE_MORNING_C.std():.3g}')
print(f'Mean/std last 7 day morning temperature, for true={df_route_true.temp_morning_7d.mean():.3g}/{df_route_true.temp_morning_7d.std():.3g}, for generated={df_tdlm.temp_morning_7d.mean():.3g}/{df_tdlm.temp_morning_7d.std():.3g}')
print(f'Mean/std last 30 day morning temperature, for true={df_route_true.temp_morning_30d.mean():.3g}/{df_route_true.temp_morning_30d.std():.3g}, for generated={df_tdlm.temp_morning_30d.mean():.3g}/{df_tdlm.temp_morning_30d.std():.3g}')

fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['today', 'last 7 day', 'last 30 day'],
                      x_title='Trou de la Mouche, morning temperature [°C]')
# fig = go.Figure(layout=dict(title='Trou de la Mouche, morning temperature [°C]', bargroupgap=0.1, yaxis=dict(title='%')))
bins = {'start': -35, 'end': 30, 'size': 1}
fig.add_histogram(x=df_route_true.TEMPERATURE_MORNING_C, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=1)
fig.add_histogram(x=df_tdlm.TEMPERATURE_MORNING_C, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=1)
fig.add_histogram(x=df_tdlm_fooled.TEMPERATURE_MORNING_C, name='fooled', histnorm='percent', xbins=bins, marker=style_fooled, row=1, col=1)
fig.add_histogram(x=df_route_true.temp_morning_7d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=2)
fig.add_histogram(x=df_tdlm.temp_morning_7d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=2)
fig.add_histogram(x=df_tdlm_fooled.temp_morning_7d, name='fooled', histnorm='percent', xbins=bins, marker=style_fooled, row=1, col=2)
fig.add_histogram(x=df_route_true.temp_morning_30d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=3)
fig.add_histogram(x=df_tdlm.temp_morning_30d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=3)
fig.add_histogram(x=df_tdlm_fooled.temp_morning_30d, name='fooled', histnorm='percent', xbins=bins, marker=style_fooled, row=1, col=3)
fig.update_yaxes(title='%')
Mean/std morning temperature, for true=-3.73/5.45, for generated=-6.72/5.81
Mean/std last 7 day morning temperature, for true=-4.17/4.87, for generated=-4.98/4.35
Mean/std last 30 day morning temperature, for true=-4.92/3.56, for generated=-5.43/3.2
In [73]:
px.scatter_matrix(df_tdlm[['TEMPERATURE_MORNING_C', 'temp_morning_7d', 'temp_morning_30d', 'elevation_up_snow', 'elevation_down_snow']], opacity=0.1, 
                  title='Trou de la Mouche, temperature-elevation correlations of generated data', height=600)

Global warming impact

All temperature are raised by 4 degrees

Global warming has been found to be doubled in the Alps:

In [74]:
delta_temp = 4
In [75]:
c2_warm = [*[shift_scaled(scaler, used_cols.index(feature), tdlm_c2[:,i], delta_temp) for i, feature in enumerate(temperature_features.keys())], tdlm_c2[:,-1]]
tdlm_c2_warm = pd.DataFrame(np.array(c2_warm).T, columns=features_c2)
In [76]:
tdlm_c3_warm = gen_c3.predict(np.c_[tdlm_c1, 
                                    tdlm_c2_warm,
                                    np.random.normal(0, 1, [tdlm_num, num_latent_c3])])
In [77]:
df_tdlm_warm = pd.DataFrame(scaler.inverse_transform(np.c_[tdlm_c1, 
                                                           tdlm_c2_warm,
                                                           tdlm_c3_warm]), 
                       columns=used_cols)

df_tdlm_warm['condition_rating'] = df_tdlm_warm['condition_rating_num'].round().clip(0, 4).replace(rating_unmapper)
In [78]:
tdlm_scores_warm = tf.sigmoid(discriminator2.predict(np.c_[tdlm_c1, 
                                                           tdlm_c2_warm, 
                                                           tdlm_c3_warm]))
tdlm_fooled_warm = tdlm_scores_warm >= 0.5
tdlm_scores_warm.numpy().mean(), tdlm_fooled_warm.numpy().mean()
Out[78]:
(0.52740234, 0.6814)

Elevations with global warming

In [112]:
print(f"Skis on on the way up median: now = {df_tdlm['elevation_up_snow'].median():.1f}, with global warming of {delta_temp}°C = {df_tdlm_warm['elevation_up_snow'].median():.1f}")
print(f"Skis on on the way up median: now = {df_tdlm['elevation_down_snow'].median():.1f}, with global warming of {delta_temp}°C = {df_tdlm_warm['elevation_down_snow'].median():.1f}")
fig = sp.make_subplots(rows=1, cols=2, shared_yaxes=True, subplot_titles=['Skis on, way up', 'Skis off, way down', 'Max'],
                      x_title='Trou de la Mouche, generated elevations [m] as cumulative histograms')
bins = {'start': 900, 'end': 2000, 'size': 25}
fig.add_histogram(x=df_tdlm['elevation_up_snow'], xbins=bins, name='now', histnorm='percent', cumulative_enabled=True, marker=style_generated, row=1, col=1)
fig.add_histogram(x=df_tdlm_warm['elevation_up_snow'], xbins=bins, name='with warming', cumulative_enabled=True, histnorm='percent', marker=style_warm, row=1, col=1)
fig.add_histogram(x=df_tdlm['elevation_down_snow'], xbins=bins, name='now', histnorm='percent', cumulative_enabled=True, marker=style_generated, row=1, col=2)
fig.add_histogram(x=df_tdlm_warm['elevation_down_snow'], xbins=bins, name='with warming', histnorm='percent', cumulative_enabled=True, marker=style_warm, row=1, col=2)
Skis on on the way up median: now = 1336.4, with global warming of 4°C = 1433.1
Skis on on the way up median: now = 1349.0, with global warming of 4°C = 1447.2

Condition ratings with warming

In [80]:
print(f'Mean/std condition rating, now={df_tdlm.condition_rating_num.mean():.3g}/{df_tdlm.condition_rating_num.std():.3g}, with warming={df_tdlm_warm.condition_rating_num.mean():.3g}/{df_tdlm_warm.condition_rating_num.std():.3g}, ')
fig = go.Figure(layout=dict(title='Trou de la Mouche, condition rating', bargroupgap=0.01, 
                            xaxis=dict(categoryorder='array', categoryarray=list(rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_tdlm.condition_rating, name='now', histnorm='percent', marker=style_generated)
fig.add_histogram(x=df_tdlm_warm.condition_rating, name='with warming', histnorm='percent', marker=style_warm)
Mean/std condition rating, now=3.09/0.652, with warming=3.07/0.642, 
In [ ]: